*===============================================================================
* Program: merge2.ado (generalized merge)
* Purpose: Generalizes the merge program in Stata to allow for:
*          (1) using datasets of non-dta filetype
*          (2) merge keys with different names in master/using datasets
* Version: 0.1
* Date: May 2018
* Author: Michael Droste
*===============================================================================
 
program define merge2

version 13.0

syntax anything using/, ///
	[KEEPUSing(namelist) ///
	 GENerate(varlist max=1) ///
	 NOGENerate ///
	 NOLabel ///
	 NONotes ///
	 update ///
	 replace ///
	 NOREPort ///
	 force ///
	 assert(string asis) ///
	 keep(string asis) sorted ///
	 using_keys(string asis) ///
	]
		
*-------------------------------------------------------------------------------
* Setup
*-------------------------------------------------------------------------------

set more off

* Get the merge type and keys to merge from master dataset
gettoken merge_type master_keys : anything
local num_master_keys: word count `master_keys'
local num_using_keys: word count `using_keys'	

di "master keys: `master_keys'"
di "using keys: `using_keys'"
di "num master keys: `num_master_keys'"
di "num using keys: `num_using_keys'"

*-------------------------------------------------------------------------------
* Exception handling
*-------------------------------------------------------------------------------
	
* Check if merge type correctly specified
if "`merge_type'"!="1:1" & "`merge_type'"!="m:1" & "`merge_type'"!="1:m" & "`merge_type'"!="m:m" {
	di in red "Merge type (`merge_type') incorrectly specified (valid input is 1:1, m:1, 1:m, or m:m) - exiting."
	exit 1	
}
	
* Check if each master key is actually in dataset
if "`master_keys'"!=" _n" {
	foreach v in `master_keys' {
		capture confirm variable `v', exact
		if _rc > 7 {
			di in red "Merge key variable `v' is not in the master dataset - exiting."
			exit 1
		}
	}
}
	
* Check if master keys uniquely identify data when doing 1:1 or 1:m merge
if ("`merge_type'"=="1:1" | "`merge_type'"=="1:m") & "`master_keys'"!=" _n" {
	qui duplicates tag `master_keys', gen(master_dupkey_check)
	if sum(master_dupkey_check)>0 {
		qui drop master_dupkey_check
		di in red "Master keys do not uniquely identify observations in the master data - exiting."
		exit 1
	}
	else qui drop master_dupkey_check
}

* Check if master keys are specified
if `num_master_keys'==0 {
	di in red "No merge key variables from master dataset specified - exiting."
	exit 1
}
	
* If doing an observation merge, using keys should not be specified
if "`master_keys'"==" _n" & `num_using_keys'>0 {
	di in red "Merge by observation (`master_keys') incompatible with using_keys() - exiting."
	exit 1
}
	
* If using keys are specified, make sure there are the same number as master keys
if `num_using_keys'!=0 & `num_using_keys'!=`num_master_keys' {
	di in red "Number of using keys (`num_using_keys') not equal to number of master keys (`num_master_keys') - exiting."
	exit 1
}
	
* Check if using file exists
capture confirm file "`using'"
if _rc>0 & strpos("`using'","http://")==0 {
	capture confirm file "`using'.dta"
	if _rc>0 {
		di in red "Couldn't find using dataset - exiting."
		exit 1
	}
	else {
		di "Extension for using dataset not specified, .dta assumed."
		local using "`using'.dta"
	}
}
	
*-------------------------------------------------------------------------------
* If using dataset isn't .dta, then we need to import it instead
*-------------------------------------------------------------------------------

local nondta = 0
	
if strpos("`using'",".csv")>0 {
		tempfile merge2_temp_master
		tempfile merge2_temp_using
		save `merge2_temp_master'
		qui import delimited `using', clear 
		save `merge2_temp_using'
		local using "merge2_temp_using"
		use `merge2_temp_master'
		local nondta = 1		
}

*-------------------------------------------------------------------------------
* If using_keys specified, temporarily rename master keys
*-------------------------------------------------------------------------------

* If using keys are specified...
if `num_using_keys'>0 {
	
	* Parse master keys and using keys into a series of macros
	local i = 1
	foreach v in `master_keys' {
		local key_m_`i' `v'
		local i = `i'+1
	}	
	local i = 1
	foreach v in `using_keys' {
		local key_u_`i' `v'
		local i = `i'+1
	}	

	* Rename master keys
	local i = 1
	foreach v in `master_keys' {
		rename `v' `key_u_`i''
		local i = `i'+1
	}
	
}

* Specify which keys to use
local keys `master_keys'
if (`num_using_keys'> 0) local keys `using_keys'

*-------------------------------------------------------------------------------
* Merge master with using data
*-------------------------------------------------------------------------------

local mergeopts keepusing(`keepusing') generate(`generate') `nogenerate' `nolabel' `nonotes' `update' `replace' `noreport' `force' assert(`assert') keep(`keep') `sorted'
merge `merge_type' `keys' using "`using'", `mergeopts'

*-------------------------------------------------------------------------------
* If using_keys specified (master keys renamed), change back master key names
*-------------------------------------------------------------------------------

if `num_using_keys'>0 {
	local curr = 1
	foreach v in `using_keys' {
		rename `v' `key_m_`curr''
		local curr = `curr'+1
	}
}

end
